User input: Prepare session.

To run this R Markdown file, enter the following lines in R:

library(rmarkdown)
library(knitr)

Age of the dataset.

age <- "GW14"

Choose an upper limit for nGenes and nUMIs for zoomed-in plots.

nGene.limit <- 3500
nUMI.limit <- 10000

Indicate the path to your list of ribosomal gene names.

ribogenes <- read_table("/kriegsteinlab/data1/carmen/2nd-trimester/qc/ribogenes.txt", col_names = F)$X1

Path to your dataset (in this case, an .RData workspace file), and the name of the Seurat object contained within the workspace. We load the data before running the .Rmd file as this takes a long time, and is otherwise done everytime the script is run (immune to caching.)

dataset <- "/kriegsteinlab/data1/aparna/homefiles/gw14.RData"
load(dataset)

seurat.obj <- "gw14"

Indicate the path to a metadata file with cell names and their brain regions of origin.

meta <- read_tsv("/kriegsteinlab/data1/carmen/2nd-trimester/metadata/colnames_GW14_080818_fixed.txt", 
                   col_names = c("cell.name", "brain.region"))

Call the knitr::render function indicating the path to this .Rmd file:

render("/kriegsteinlab/data1/carmen/2nd-trimester/qc/code/QC_10X_plots_2018-08-30.Rmd")

Begin R Markdown script:

knitr::opts_chunk$set(cache = TRUE, warning = FALSE, message = FALSE, cache.lazy = FALSE)

Load libraries

library(Seurat)
library(readr)
library(ggplot2)
library(scales)
library(viridis)
library(tidyr)
library(dplyr)
library(janitor)

Create a new Seurat object using the raw data in the original Seurat object.

For compatibility issues with the new version of Seurat.

assign("seurat.obj.new", CreateSeuratObject(raw.data = get(seurat.obj)@raw.data, min.cells = 0, min.genes = 0, project=age))

rm(list=seurat.obj)

Declare multiplot function.

multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  
  library(grid)

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                    ncol = cols, nrow = ceiling(numPlots/cols))
  }

 if (numPlots==1) {
    print(plots[[1]])

  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

Set the brain region of cells as their original identity in both the @ident and the @meta.data slots.

seurat.obj.new@meta.data$orig.ident <- factor(meta$brain.region)

seurat.obj.new <- SetIdent(seurat.obj.new, cells.use = WhichCells(seurat.obj.new), ident.use = meta$brain.region)
summary(seurat.obj.new@meta.data)
##      nGene             nUMI              orig.ident   
##  Min.   :  41.0   Min.   :  191   motor       :20053  
##  1st Qu.: 369.0   1st Qu.:  580   occipital   :14200  
##  Median : 507.0   Median :  855   MGE         :13241  
##  Mean   : 579.9   Mean   : 1056   hypothalamus:13141  
##  3rd Qu.: 684.0   3rd Qu.: 1253   LGE         :12771  
##  Max.   :8426.0   Max.   :60900   CGE         :11483  
##                                   (Other)     :13990

Generate table with number and percentage of cells in each brain region.

seurat.obj.new@meta.data %>% tabyl(orig.ident) %>% adorn_totals(where="row") %>% arrange(desc(n)) 

Seurat’s ViolinPlot plotting by area.

(Not working currently.)

violin.plot <- VlnPlot(seurat.obj.new, features.plot = c("nGene", "nUMI"), do.sort = T, point.size.use = .5, ident.include = as.character(statsByArea$orig.ident))

Calculate % of mitochondrial genes in each cell.

mito.genes <- grep(pattern="^MT-", x=rownames(seurat.obj.new@data), value = TRUE)
percent.mito <- colSums(seurat.obj.new@raw.data[mito.genes, ])/colSums(seurat.obj.new@raw.data)

seurat.obj.new <- AddMetaData(object = seurat.obj.new, metadata = percent.mito, col.name = "percent.mito")

Calculate % of ribosomal genes in each cell.

ribo <- seurat.obj.new@raw.data[ribogenes, ]
percent.ribo <- colSums(ribo)/colSums(seurat.obj.new@raw.data)

seurat.obj.new <- AddMetaData(object = seurat.obj.new, metadata = percent.ribo, col.name = "percent.ribo")

Calculate mean UMIs, genes, ribosomal and mitochondrial % for each brain region.

statsByArea <- seurat.obj.new@meta.data %>% dplyr::group_by(orig.ident) %>% 
  summarise(mean.nGene = mean(nGene), mean.nUMI =mean(nUMI), mean.pctmito = mean(percent.mito), mean.pctribo= mean(percent.ribo))

statsByArea

PLOTS

1. Violin plot: Genes per cell, split by brain region.

seurat.obj.new@meta.data$orig.ident <- ordered(seurat.obj.new@meta.data$orig.ident, 
                                       levels = (statsByArea %>% arrange(mean.nGene))$orig.ident)

p1 <- ggplot(seurat.obj.new@meta.data) + 
  geom_jitter(aes(orig.ident, nGene, colour=orig.ident), alpha=0.2, size=0.25) + 
  geom_violin(aes(orig.ident, nGene), colour="grey60", scale = "width", alpha=0.3) +
  scale_color_manual(values = c("#E58606", "#5D69B1", "#52BCA3", "#99C945", "#ffc300" ,"#ED645A", "#2F8AC4", "#764E9F", "#E58606", "#5D69B1", "#52BCA3", "#99C945")) +
  theme_minimal() +
  theme(plot.title = element_text(size= 12, hjust = 0.5), 
        axis.title.y=element_text(vjust = 2),
        axis.title.x=element_text(vjust = -1),
        axis.text.x = element_text(angle = 30)
        ) +
  scale_y_continuous(breaks=seq(0, max(seurat.obj.new@meta.data$nGene), by=1000)) +
  scale_x_discrete(breaks=(statsByArea %>% arrange(mean.nGene))$orig.ident) +
  labs(title=paste0(age, ": Genes per Cell"),
       x="", 
       y="Genes per Cell"
      ) +
  guides(fill=FALSE, colour=FALSE)

p1

2. Violin plot: Genes per cell [zoom-in].

p2 <- ggplot(seurat.obj.new@meta.data) + 
  geom_jitter(aes(orig.ident, nGene, colour=orig.ident), alpha=0.2, size=0.25) + 
  geom_violin(aes(orig.ident, nGene), colour="grey60", scale = "width", alpha=0.3) +
  scale_color_manual(values = c("#E58606", "#5D69B1", "#52BCA3", "#99C945", "#ffc300" ,"#ED645A", "#2F8AC4", "#764E9F", "#E58606", "#5D69B1", "#52BCA3", "#99C945")) +
  theme_minimal() +
  theme(plot.title = element_text(size= 12, hjust = 0.5), 
        axis.title.y=element_text(vjust = 2),
        axis.title.x=element_text(vjust = -1),
        axis.text.x = element_text(angle = 30)
        ) +
  scale_y_continuous(breaks=seq(0, nGene.limit, by=nGene.limit/10), limits = c(0, nGene.limit)) +
  scale_x_discrete(breaks=(statsByArea %>% arrange(mean.nGene))$orig.ident) +
  labs(title=paste0(age, ": Genes per Cell (Zoom)"),
       x="", 
       y="Genes per Cell"
      ) +
  guides(fill=FALSE, colour=FALSE)
  
p2

3. Violin plot: UMIs per cell, split by brain region.

p3 <- 
  ggplot(seurat.obj.new@meta.data) + 
  geom_jitter(aes(orig.ident, nUMI, colour=orig.ident), alpha=0.2, size=0.25) + 
  scale_color_manual(values = c("#E58606", "#5D69B1", "#52BCA3", "#99C945", "#ffc300" ,"#ED645A", "#2F8AC4", "#764E9F", "#E58606", "#5D69B1", "#52BCA3", "#99C945")) +
  geom_violin(aes(orig.ident, nUMI), colour="grey60", colour="grey30", alpha=0.3, scale="width") +
  theme_minimal() +
  theme(plot.title = element_text(size= 12, hjust = 0.5), 
        axis.title.y=element_text(vjust = 2),
        axis.title.x=element_text(vjust = -1),
        axis.text.x = element_text(angle = 30)
        ) +
  scale_y_continuous(breaks=seq(0, max(seurat.obj.new@meta.data$nUMI), by=10000)) +
  labs(title=paste0(age, ": UMIs per Cell"),
       x="", 
       y="UMIs per Cell"
      ) +
  guides(fill=FALSE, colour=FALSE)

p3

4. Violin plot: UMIs per cell [Zoom-in].

p4 <- 
  ggplot(seurat.obj.new@meta.data) + 
  geom_jitter(aes(orig.ident, nUMI, colour=orig.ident), alpha=0.2, size=0.25) + 
  scale_color_manual(values = c("#E58606", "#5D69B1", "#52BCA3", "#99C945", "#ffc300" ,"#ED645A", "#2F8AC4", "#764E9F", "#E58606", "#5D69B1", "#52BCA3", "#99C945")) +
  geom_violin(aes(orig.ident, nUMI), colour="grey60",  colour="grey60", alpha=0.3, scale="width") +
  theme_minimal() +
  theme(plot.title = element_text(size= 12, hjust = 0.5), 
        axis.title.y=element_text(vjust = 2),
        axis.title.x=element_text(vjust = -1),
        axis.text.x = element_text(angle = 30)
        ) +
  # ylim(0, 40000) +
  scale_y_continuous(breaks=seq(0, nUMI.limit, by=nUMI.limit/10), limits=c(0,  nUMI.limit)) +
  labs(title=paste0(age, ": UMIs per Cell (Zoom)"),
       x="", 
       y="UMIs per Cell"
      ) +
  guides(fill=FALSE, colour=FALSE)

p4

5. Violin plot: Genes per cell + color by nUMI.

seurat.obj.new@meta.data <- seurat.obj.new@meta.data %>% arrange(nUMI)

p5 <- ggplot(seurat.obj.new@meta.data %>% filter(nGene < nGene.limit)) + 
  geom_jitter(aes(orig.ident, nGene, colour=log10(nUMI)), alpha=0.6, size=0.25) + 
  scale_color_viridis(option="magma", begin = .1, end = .95) +
  geom_violin(aes(orig.ident, nGene), colour="grey60",  colour="grey60", alpha=0.1, scale="width") +
  theme_minimal() +
  theme(plot.title = element_text(size= 12, hjust = 0.5), 
        axis.title.y=element_text(vjust = 2),
        axis.title.x=element_text(vjust = -1),
        legend.position=c(0.9, 1),
        legend.key.size = unit(0.5,"cm"),
        legend.direction = "horizontal",
        legend.text = element_text(angle = 30, size = 7),
        legend.title = element_text(size = 6),
        axis.text.x = element_text(angle = 30)
        # legend.title.align = 0.9
        ) +
  # ylim(0, 40000) +
  scale_y_continuous(breaks=seq(0, nGene.limit, by=nGene.limit/10), limits=c(0, nGene.limit)) +
  labs(title=paste0(age, ": Genes per Cell / log UMIs per Cell"),
       x="", 
       y="Genes Detected",
       color= "log10 UMI")

p5

6. Violin plot: UMIs per cell + color by nGenes.

seurat.obj.new@meta.data <- seurat.obj.new@meta.data %>% arrange(nGene)

p6 <- ggplot(seurat.obj.new@meta.data %>% filter(nUMI < nUMI.limit)) + 
  geom_jitter(aes(orig.ident, nUMI, colour=nGene), alpha=0.75, size=0.25) + 
  scale_color_viridis(option="magma", begin = .1, end = .9) +
  geom_violin(aes(orig.ident, nUMI), colour="grey60", alpha=0.1, scale="width") +
  theme_minimal() +
  theme(plot.title = element_text(size= 12, hjust = 0.5), 
        axis.title.y=element_text(vjust = 2),
        axis.title.x=element_text(vjust = -1),
        legend.position=c(0.9,1),
        legend.key.size = unit(0.5,"cm"),
        legend.direction = "horizontal",
        legend.text = element_text(angle = 30, size = 7),
        legend.title = element_text(size = 12),
        axis.text.x = element_text(angle = 30)
        # legend.title.align = 0.9
        ) +
  # ylim(0, 40000) +
  scale_y_continuous(breaks=seq(0, nUMI.limit, by=nUMI.limit/10), limits=c(0, nUMI.limit)) +
  labs(title=paste0(age, ": UMIs per Cell / Genes per Cell"),
       x="", 
       y="UMIs Detected",
       color= "")

p6

7. % Ribosomal, split by brain region.

p7 <- 
ggplot(seurat.obj.new@meta.data) + 
  geom_jitter(aes(orig.ident, percent.ribo, colour=orig.ident), alpha=0.2, size=0.25) + 
  scale_color_manual(values = c("#E58606", "#5D69B1", "#52BCA3", "#99C945", "#ffc300" ,"#ED645A", "#2F8AC4", "#764E9F", "#E58606", "#5D69B1", "#52BCA3", "#99C945")) +
  geom_violin(aes(orig.ident, percent.ribo), colour="grey60", alpha=0.3, scale="width") +
  theme_minimal() +
  theme(plot.title = element_text(size= 12, hjust=0.5), 
        axis.title.y=element_text(vjust = 2),
        axis.title.x=element_text(vjust = -1),
        axis.text.x = element_text(angle = 30)
        ) +
  # scale_y_continuous(breaks=seq(0,40000, by=5000)) +
  # ylim(0,40000) +
  labs(title=paste0(age, ": % Ribosomal Genes per Cell"),
       x="", 
       y="% Ribosomal Genes"
      ) +
  guides(fill=FALSE, colour=FALSE)

p7

8. % Mitochondrial, split by brain region.

p8 <- 
  ggplot(seurat.obj.new@meta.data) + 
  geom_jitter(aes(orig.ident, percent.mito, colour=orig.ident), alpha=0.2, size=0.25) + 
  scale_color_manual(values = c("#E58606", "#5D69B1", "#52BCA3", "#99C945", "#ffc300" ,"#ED645A", "#2F8AC4", "#764E9F", "#E58606", "#5D69B1", "#52BCA3", "#99C945")) +
  geom_violin(aes(orig.ident, percent.mito), colour="grey60", alpha=0.3, scale="width") +
  theme_minimal() +
  theme(plot.title = element_text(size= 12, hjust=0.5), 
        axis.title.y=element_text(vjust = 2),
        axis.title.x=element_text(vjust = -1),
        axis.text.x = element_text(angle = 30)
        ) +
  # scale_y_continuous(breaks=seq(0,0.1, by=0.01), limits = c(0, 0.1)) +
  
  labs(title=paste0(age, ": % Mitochondrial Genes per Cell"),
       x="", 
       y="% Mitochondrial Genes"
      ) +
  guides(fill=FALSE, colour=FALSE)

p8

9. Violin plot: Genes per cell; color by % ribosomal genes.

seurat.obj.new@meta.data <- seurat.obj.new@meta.data %>% arrange(percent.ribo)

ggplot(seurat.obj.new@meta.data) + 
  geom_jitter(aes(orig.ident, nGene, colour=percent.ribo), alpha=0.6, size=0.25) +
  geom_violin(aes(orig.ident, nGene), colour="grey40", alpha=0.1, scale="width") +
  scale_color_viridis(option="magma", begin = .15, end = .9) +
  theme_minimal() +
  theme(plot.title = element_text(size= 12, hjust = 0.5), 
        axis.title.y=element_text(vjust = 2),
        axis.title.x=element_text(vjust = -1),
        axis.text.x = element_text(angle = 30)
        ) +
  scale_y_continuous(breaks=seq(0, max(seurat.obj.new@meta.data$nGene), by=1000)) +
  scale_x_discrete(breaks=(statsByArea %>% arrange(mean.nGene))$orig.ident) +
  labs(title=paste0(age, ": Genes Detected per Cell"),
       x="Area", 
       y="nGene"
      )

10. Violin plot: Genes per cell; color by % ribosomal genes [zoom-in]

seurat.obj.new@meta.data <- seurat.obj.new@meta.data %>% arrange(percent.ribo)

p10 <- ggplot(seurat.obj.new@meta.data %>% filter(nGene < nGene.limit)) + 
  geom_jitter(aes(orig.ident, nGene, colour=percent.ribo), alpha=0.8, size=0.25) +
  geom_violin(aes(orig.ident, nGene), colour="grey40", alpha=0.1, scale="width") +
  scale_color_viridis(option="magma", begin = .1, end = .95) +
  theme_minimal() +
  theme(plot.title = element_text(size= 12, hjust = 0.5), 
        axis.title.y=element_text(vjust = 2),
        axis.title.x=element_text(vjust = -1),
        legend.position=c(0.85,1),
        legend.key.size = unit(0.5,"cm"),
        legend.direction = "horizontal",
        legend.text = element_text(angle = 30, size = 7),
        legend.title = element_text(size = 8),
        axis.text.x = element_text(angle = 30)
        # legend.title.align = 0.9
        ) +
  scale_y_continuous(breaks=seq(0, nGene.limit, by=nGene.limit/10), limits = c(0, nGene.limit)) +
  scale_x_discrete(breaks=(statsByArea %>% arrange(mean.nGene))$orig.ident) +
  labs(title=paste0(age, ": Genes per Cell / % Ribosomal Genes\n(zoom-in)"),
       x="", 
       y="Genes per Cell",
       color="% ribosomal"
      )

p10

11. Violin Plot: Genes per cell, color by % mitochondrial genes

seurat.obj.new@meta.data <- seurat.obj.new@meta.data %>% arrange(percent.mito)

p11 <- ggplot(seurat.obj.new@meta.data) + 
  geom_jitter(aes(orig.ident, nGene, colour=percent.mito), alpha=0.7, size=0.25) +
  geom_violin(aes(orig.ident, nGene), colour="grey60", alpha=0.1, scale="width") +
  scale_color_viridis(option="magma", begin = .15, end = .9) +
  theme_minimal() +
  theme(plot.title = element_text(size= 12), 
        axis.title.y=element_text(vjust = 2),
        axis.title.x=element_text(vjust = -1),
        axis.text.x = element_text(angle = 30)
        ) +
  scale_y_continuous(breaks=seq(0, max(seurat.obj.new@meta.data$nGene), by=500)) +
  scale_x_discrete(breaks=(statsByArea %>% arrange(mean.nGene))$orig.ident) +
  labs(title=paste0(age, ": Number of Genes Detected per Cell"),
       x="Area", 
       y="nGene"
      )

p11

12. Violin Plot: Genes per cell, color by % mitochondrial genes [zoom-in]

seurat.obj.new@meta.data <- seurat.obj.new@meta.data %>% arrange(percent.mito)

p12 <- ggplot(seurat.obj.new@meta.data %>% filter(nGene < nGene.limit)) + 
  geom_jitter(aes(orig.ident, nGene, colour=percent.mito), alpha=0.8, size=0.25) +
  geom_violin(aes(orig.ident, nGene), colour="grey60", alpha=0.1, scale="width") +
  scale_color_viridis(option="magma", begin = .15, end = .95) +
  theme_minimal() +
  theme(plot.title = element_text(size= 12, hjust = 0.5), 
        axis.title.y=element_text(vjust = 2),
        axis.title.x=element_text(vjust = -1),
        legend.position=c(0.85,1),
        legend.key.size = unit(0.5,"cm"),
        legend.direction = "horizontal",
        legend.text = element_text(angle = 30, size = 7),
        legend.title = element_text(size = 8),
        # legend.title.align = 0.9
        ) +
  scale_y_continuous(breaks=seq(0, nGene.limit, by=nGene.limit/10), limits = c(0, nGene.limit)) +
  scale_x_discrete(breaks=(statsByArea %>% arrange(mean.nGene))$orig.ident) +
  labs(title=paste0(age, ": Genes per Cell / % Mitochondrial Genes"),
       x="", 
       y="Genes per Cell",
       color="% mito"
      )

p12

13. Violin Plot: % Ribosomal, color by % mitochondrial.

seurat.obj.new@meta.data <- seurat.obj.new@meta.data %>% arrange(percent.mito)

p13 <- 
ggplot(seurat.obj.new@meta.data) + 
  geom_jitter(aes(orig.ident, percent.ribo, colour=percent.mito), alpha=0.7, size=0.25) + 
  geom_violin(aes(orig.ident, percent.ribo), colour="grey60", alpha=0.1, scale="width") +
  scale_color_viridis(option="magma", begin = .10, end = .90) +
  theme_minimal() +
  theme(plot.title = element_text(size= 12, hjust = 0.5), 
        axis.title.y=element_text(vjust = 2),
        axis.title.x=element_text(vjust = -1),
        legend.position=c(0.87,.95),
        legend.key.size = unit(0.5,"cm"),
        legend.direction = "horizontal",
        legend.text = element_text(angle = 30, size = 7),
        legend.title = element_text(size = 8),
        axis.text.x = element_text(angle = 30)
        # legend.title.align = 0.9
        ) +
  labs(title=paste0(age, ": % Ribosomal Genes / % Mitochondrial Genes per Cell"),
       x="", 
       y="% Ribosomal Genes",
       color="% mito"
      )

p13

14. Violin Plot: % Mitochondrial, color by % ribosomal.

seurat.obj.new@meta.data <- seurat.obj.new@meta.data %>% arrange(percent.ribo)

p14 <- 
ggplot(seurat.obj.new@meta.data) + 
  geom_jitter(aes(orig.ident, percent.mito, colour=percent.ribo), alpha=0.7, size=0.20) + 
  geom_violin(aes(orig.ident, percent.mito), colour="grey60", alpha=0.1, scale="width") +
  scale_color_viridis(option="magma", begin = .10, end = .95) +
  theme_minimal() +
  theme(plot.title = element_text(size= 12, hjust = 0.5), 
        axis.title.y=element_text(vjust = 2),
        axis.title.x=element_text(vjust = -1),
        legend.position=c(0.85,.93),
        legend.key.size = unit(0.5,"cm"),
        legend.direction = "horizontal",
        legend.text = element_text(angle = 30, size = 7),
        legend.title = element_text(size = 8),
        axis.text.x = element_text(angle = 30)
        # legend.title.align = 0.9
        ) +
  labs(title=paste0(age, ": % Mitochondrial Genes / % Ribosomal Genes per Cell"),
       x="", 
       y="% Mitochondrial Genes",
       color = "% ribosomal"
      )

p14

meta.from.seurat <- seurat.obj.new@meta.data
rm(seurat.obj.new, matrix_scaled, matrix_pca, ribo, cluster)